

*********************************************************************************************
* construct a SMD-level measure of partisan indifference:
* first estimate a conditional-logit model of vote-intention in Bavaria (2010 survey data)
* then aggregate predicted probability of voting CSU vs other party 

** Data used:
** Schoen, Harald (2013): Referendum concerning the protection of nonsmokers in Bavaria 2010. 
** GESIS Data Archive, Cologne. ZA5363 Data file Version 1.0.0:
** doi:10.4232/1.11765

** Data created: indiff2010.dta with main variable indiff2010

* Note: The aggregation step requires residence information at the community level for each respondent.
* This information is not included in the version of the dataset available at the DOI mentioned.
* We would like to thank the principal investigator Harald Schoen for providing those data to us.
* This do-file refers to the publicly available version of the dataset (only the set of variables included differs)

*********************************************************************************************

version 14

* set directory here (the folder where the subfolders are located)
* global repldirjop "insert here"
cd "$repldirjop"

capture log close
log using "create_indiff2010_variable\survey_indifference2010.log", replace

use "ZA5363_v1-0-0.dta", clear // please download from doi:10.4232/1.11765
* use "ZA5363\Volksentscheid_Individual+Kreis+Gemeinden_1.dta", clear // same data, with residence information
* gen kkz = floor(gkz/1000)

* Franconia dummy variable
gen franconia = 0 if kkz != .
replace franconia = 1 if kkz >= 9400 & kkz < 9700

* Vote intention state-level election (which vote that is is left open, on request interviewer says second)
gen pvote = Q24
recode pvote (6=5) (5=6) (7/14 = .) // use order as in pid variable CSU SPD FDP GRU Lin FW

*long format version
forvalues i = 1/6 {
gen pvote`i' = pvote
recode pvote`i' (`i' = 1) (nonmiss = 0 ) 
}

* Party id
gen pid1 = 0 if (Q64 != . & Q64 != 12) | (Q65 != . & Q65 != 12) // create pid var if one of two (split-sample Germany vs Bavaria wording variant is ignored) pid questions answered ( code 12 = keine Angabe / no response)
replace pid1 = 1  if (Q64 < 4| Q65 < 4) & (Q66 == 1 | Q66 == 2 ) // use all of CDU, CDU/CSU, CSU (very strong or rather strong)

forvalues i = 2/6 {
gen pid`i' = 0 if (Q64 != . & Q64 != 12) | (Q65 != . & Q65 != 12)
replace pid`i' = 1 if (Q64 == `i' + 2 | Q65 == `i' + 2 ) & (Q66 == 1 | Q66 == 2 ) 
}

* left-right distance (from 1-11 scale)
gen lr_self = Q22
mvdecode lr_self, mv(98 99)
replace lr_self = lr_self - 1 // recode to 0-10
gen lrdist1 = abs(lr_self-6.8) // party positions = means from 2013 MEDW election survey
gen lrdist2 = abs(lr_self-3.9)
gen lrdist3 = abs(lr_self-5.7)
gen lrdist4 = abs(lr_self-3.6)
gen lrdist5 = abs(lr_self-1)
gen lrdist6 = abs(lr_self-5.3)

* gender
gen female = Q56
recode female (1 = 0 ) (2 = 1)

* age cohort
* 2010: 18-24, 25-29, 30-39, 40-49, 50-64, 65-74, 75-
gen AGE = 2010-Q55 if Q55 < 9990
egen resp_agecat = cut(AGE), at(0 30(10)60 100) // >= category label < next cat label
qui tab resp_agecat, gen(resp_agecat_gr)

* 'high education level' (Fachhochschulreife or Abitur)
gen higheduc = Q57
recode higheduc ( 4 5 =  1) (1/3 6/9 = 0)  // still in school, don't know, other, no response are all subsumed under 0

* religion (ref: no religion; don't know) 
gen religion = Q63
recode religion (1 = 1) (2 = 2) (3/6 =3) 
* 1 catholic, 2 protestant, 3 other/none/don't know/keine Angabe 

* Note: in comparison to the 2013 election survey, there is no information on competence ratings, economic assessements, or union membership info in this survey

sum pvote* pid* lrdist* female resp_agecat_gr2-resp_agecat_gr5 higheduc i.relig franconia
gen respid = _n

keep respid persgew  pvote?  pid*  lrdist* relig franconia higheduc female resp_age*  
* keep respid gkz persgew  pvote?  pid*  lrdist* relig franconia higheduc female resp_age*  

reshape long pvote pid  lrdist , i(respid) j(party_num)

*******************************
*** conditional logit model ***
*******************************

global fcontrols "pid lrdist"
global fcasecntrls "female resp_agecat_gr2-resp_agecat_gr5 higheduc i.relig franconia  "


xi: asclogit pvote $fcontrols [iw=persgew] , case(respid) alt(party_num) base(1) casevars($fcasecntrls) 
predict prob_w

* check: is csu vote correctly predicted?
gen csu_correct = 0 if party_num == 1 & pvote != . 
replace csu_correct = 1 if csu_correct == 0 & party_num == 1 & ( (pvote == 1 & prob_w > .5 & prob_w != .) |  ( pvote == 0  & prob_w < .5))
tab csu_correct if party_num == 1
tab csu_correct if party_num == 1 & e(sample)

keep if party_num == 1
gen indiff_w = prob_w*(1-prob_w)

/*
* The variable indiff_w is now aggregated to the SMD level.

keep gkz persgew indiff

* merge mapping of community to SMD
gen schlsselnr = gkz-9000000
merge m:1 schlsselnr using "ZA5363\2014-10-06-Mapping Gemeinde Stimmkreis.dta"
drop if _m == 2 // 844 of in total 2056 Gemeinden / local communities not in Survey
drop _m


tab stimmkreisnrltw2013
di (456+164+81)/4000 
* for 17.5% of cases the SMD cannot be uniquely identified, since there are several SMDs in large cities 
* (Muenchen 101-108, Nuernberg 501-504,Augsburg 701-702)
* use the city-level value for all these SMDs in aggregate analysis

* some parts of these large city SMDs can be identified (neighbouring communities):
* Nuernberg Sued 503: 565000 Schwabach 
* Nuernberg Ost 502: 574123 Feucht; also Rueckersdorf, Schwaig
* 702: Gersthofen; Neusaess
* -> add them to the respective city as a whole
gen stk_code = stimmkreisnrltw2013
replace stk_code = "501-504" if stimmkreisnrltw2013 == "502" | stimmkreisnrltw2013 == "503"
replace stk_code = "701-702" if stimmkreisnrltw2013 == "702" 

collapse (mean) indiff2010 =indiff_w (count) N_indiff2010=indiff_w  [iw=persgew], by(stk_code)
gen year = 2013

* save and merge this with aggregate data
save indiff2010.dta, replace

*/

log close

